#################################################################
## Military aid, regime vulnerability, and political violence  ##
##                                                             ##
## Andrew Boutton                                              ##
##                                                             ##
## Figure 1                                                    ##
##                                                             ##
## Version: 7/24/2019                                          ##
#################################################################



library(ggplot2)
library(foreign)
    
p2 = read.dta("coupprob.dta") # read in data - this is the same as the main data with missing values for predicted coup probability (p2) removed.

summarySEwithin <- function(data=NULL, measurevar, betweenvars=NULL, withinvars=NULL,
                             idvar=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) {

     # Ensure that the betweenvars and withinvars are factors
     factorvars <- vapply(data[, c(betweenvars, withinvars), drop=FALSE],
                          FUN=is.factor, FUN.VALUE=logical(1))

     if (!all(factorvars)) {
         nonfactorvars <- names(factorvars)[!factorvars]
         message("Automatically converting the following non-factors to factors: ",
                 paste(nonfactorvars, collapse = ", "))
         data[nonfactorvars] <- lapply(data[nonfactorvars], factor)
     }

     # Get the means from the un-normed data
     datac <- summarySE(data, measurevar, groupvars=c(betweenvars, withinvars),
                        na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)

     # Drop all the unused columns (these will be calculated with normed data)
     datac$sd <- NULL
     datac$se <- NULL
     datac$ci <- NULL

     # Norm each subject's data
     ndata <- normDataWithin(data, idvar, measurevar, betweenvars, na.rm, .drop=.drop)

     # This is the name of the new column
     measurevar_n <- paste(measurevar, "_norm", sep="")

     # Collapse the normed data - now we can treat between and within vars the same
     ndatac <- summarySE(ndata, measurevar_n, groupvars=c(betweenvars, withinvars),
                         na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)

     # Apply correction from Morey (2008) to the standard error and confidence interval
     #  Get the product of the number of conditions of within-S variables
     nWithinGroups    <- prod(vapply(ndatac[,withinvars, drop=FALSE], FUN=nlevels,
                                     FUN.VALUE=numeric(1)))
     correctionFactor <- sqrt( nWithinGroups / (nWithinGroups-1) )

     # Apply the correction factor
     ndatac$sd <- ndatac$sd * correctionFactor
     ndatac$se <- ndatac$se * correctionFactor
     ndatac$ci <- ndatac$ci * correctionFactor

     # Combine the un-normed means with the normed results
     merge(datac, ndatac)
 }

summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                       conf.interval=.95, .drop=TRUE) {
     library(plyr)

     # New version of length which can handle NA's: if na.rm==T, don't count them
     length2 <- function (x, na.rm=FALSE) {
         if (na.rm) sum(!is.na(x))
         else       length(x)
     }

     # This does the summary. For each group's data frame, return a vector with
     # N, mean, and sd
     datac <- ddply(data, groupvars, .drop=.drop,
                    .fun = function(xx, col) {
                        c(N    = length2(xx[[col]], na.rm=na.rm),
                          mean = mean   (xx[[col]], na.rm=na.rm),
                          sd   = sd     (xx[[col]], na.rm=na.rm)
                        )
                    },
                    measurevar
     )

     # Rename the "mean" column
     datac <- rename(datac, c("mean" = measurevar))

     datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

     # Confidence interval multiplier for standard error
     # Calculate t-statistic for confidence interval:
     # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
     ciMult <- qt(conf.interval/2 + .5, datac$N-1)
     datac$ci <- datac$se * ciMult

     return(datac)
 }

normDataWithin <- function(data=NULL, idvar, measurevar, betweenvars=NULL,
                            na.rm=FALSE, .drop=TRUE) {
     library(plyr)

     # Measure var on left, idvar + between vars on right of formula.
     data.subjMean <- ddply(data, c(idvar, betweenvars), .drop=.drop,
                            .fun = function(xx, col, na.rm) {
                                c(subjMean = mean(xx[,col], na.rm=na.rm))
                            },
                            measurevar,
                            na.rm
     )

     # Put the subject means with original data
     data <- merge(data, data.subjMean)

     # Get the normalized data in a new column
     measureNormedVar <- paste(measurevar, "_norm", sep="")
     data[,measureNormedVar] <- data[,measurevar] - data[,"subjMean"] +
         mean(data[,measurevar], na.rm=na.rm)

     # Remove this subject mean column
     data$subjMean <- NULL

     return(data)
 }

stderr <- summarySE(p2, measurevar="p2", groupvars=c("regimeageord","regimetype"))


stderr2 <- stderr
stderr2$regimetype <- factor(stderr2$regimetype)
stderr2$regimeageord <- factor(stderr2$regimeageord)   

  
ggplot(stderr2, aes(x=regimetype, y=p2, fill=regimeageord)) + 
    geom_bar(position=position_dodge(), stat="identity") +
    geom_errorbar(aes(ymin=p2-ci, ymax=p2+ci),
                  width=.2,                    # Width of the error bars
                  position=position_dodge(.9))  +
    ylab("Pr(successful coup)") + xlab("Regime type") +
    scale_fill_manual(name="Regime age",# Legend label, use darker colors
                   breaks=c(1,2, 3, 4),
                   labels=c("< 6 years", "6-10 years", "11-20 years","> 20 years"),
                   values=c("seagreen4", "seagreen3", "seagreen2", "seagreen1")) +
    ggtitle("Probability of successful regime-change coups over time") +
    scale_y_continuous(limits=c(0,0.06)) +  
    theme_bw() + scale_x_discrete(labels=c("1" = "Democracy", "2" = "Single-party", "3" = "Personalist", "4"="Junta", "5"="Monarchy")) +  
    theme(axis.text.x = element_text(size=15), axis.title.y = element_text(size=20, face="bold", margin=margin(0,20,0,0)), axis.text.y = element_text(size=15, face="bold"), axis.title.x = element_text(size=15, margin=margin(20,0,0,0)),plot.title = element_text(size = 20, face = "bold"))      
             

                       
                            
                            


    
    


